###########################################################
# Kerice Doten-Snitker
# 
# Climate data, from CCSM 4.0
#
# Script based on scripts from N. Gauthier
# https://github.com/nick-gauthier/Risk-Resilience/blob/master/CCSM4%20Europe%20mh6k.Rmd
# https://github.com/nick-gauthier/Risk-Resilience/blob/master/climate-analysis.Rmd
# for https://doi.org/10.1016/j.quascirev.2017.09.015 
#
# Built under R 4.1.3
###########################################################

# set locale to preempt UTF-8 issues with the mydata
#Sys.getlocale()
# for Mac
Sys.setlocale(category = "LC_ALL", locale = "en_US.UTF-8")
#for windows
#Sys.setlocale(category = "LC_ALL", locale = "English_United States.1252")

# add packages with the pacman library, which installs if not installed
library(pacman)
# use the here package for improving replicability
p_load(here)
# for data manipulation
p_load(magrittr, plyr, tidyverse, reshape2)
# for GIS functions
p_load(ncdf4) # import global climate model data
p_load(rgdal) # read GCM data
p_load(raster, terra) # process GCM data - raster is on its way out, and terra on its way in, but raster still deals with netCDFs much better
#p_load(rasterVis) # plotting GCM data
#p_load(EMD) # calculate trends in the data

set.seed(1418)

###
# Observations are cities in which Jews lived 1000-1520, measured annually
###


#### CCSM 4.0 Import and Preprocessing ------------------------------

# Raw, annual rasters can be downloaded at http://www.cesm.ucar.edu/models/ccsm4.0/
# I use the 850-1850 climate model, 
# which includes monthly observations at a 1 degree frequency.
# CCSM 4.0 raster data used here were downloaded, compiled, and shared by Nick Gauthier.
# The coordinate reference system (CRS) for the rasterbricks is WGS84.

# First, import data from the CCSM 4.0 paleoclimate simulation. Convert the precipitation values to mm/year and temperature values to degrees Celsius.  
# Economists generally use data from Guiot, Corona, & ESCARCEL (https://doi.org/10.1371/journal.pone.0009972), which is growing season temps (April-Sept). Limit to these months with time = YYYY-[05:10]-01.

# total monthly precipitation
CCSM.prc <- brick(here::here("GIS_data", "Rasters", "b40.lm850-1850.1deg.001.cam2.h0.PRECT.085001-185012-005.nc")) %>%
  crop(extent(0, 15, 40, 55)) %>% # crop to study area
  subset(grep("\\.(0[5-9]|10)\\.", names(.), value=FALSE)) %>% # growing season
  multiply_by(2629743830) # convert to mm

CCSM.prc.ann <- CCSM.prc %>%
  stackApply(indices = c(rep(1:1001,each=6)), fun = mean)
names(CCSM.prc.ann) <- paste("prc",seq(850,1850,1),sep="-")

# mean monthly temp
CCSM.t <- brick(here::here("GIS_data", "Rasters","b40.lm850-1850.1deg.001.cam2.h0.TREFHT.085001-185012-003.nc")) %>%
  crop(extent(0, 15, 40, 55)) %>%
  subset(grep("\\.(0[5-9]|10)\\.", names(.), value=FALSE)) %>% # growing season
  subtract(273.15) # convert from kelvin to C

CCSM.t.ann <- CCSM.t %>%
  stackApply(indices = c(rep(1:1001,each=6)), fun = mean)
names(CCSM.t.ann) <- paste("prc",seq(850,1850,1),sep="-")

# # max monthly temp
# CCSM.tmx <- brick(here::here("GIS_data", "Rasters","b40.lm850-1850.1deg.001.cam2.h0.TREFMXAV.085001-185012-002.nc")) %>%
#   crop(extent(0, 15, 40, 55)) %>%
#   subtract(273.15) # convert from kelvin to C
# 
# # min monthly temp
# CCSM.tmn <- brick(here::here("GIS_data", "Rasters","b40.lm850-1850.1deg.001.cam2.h0.TREFMNAV.085001-185012-004.nc")) %>%
#   crop(extent(0, 15, 40, 55)) %>%
#   subtract(273.15) # convert from kelvin to C

#### Sample Locations ----------------------------------------------

# Import the matrix with the coordinates for the cities
# use version where CRS is WGS84

points_coords_WGS84_sp <- readOGR(here::here("GIS_data","Vectors", "Haver_Kartenwerk_citiesmaster"), "settlements", GDAL1_integer64_policy = TRUE, stringsAsFactors=FALSE)
cities <- points_coords_WGS84_sp %>%
  data.frame() %>% # extract all the data
  dplyr::select(-coords.x1, -coords.x2, -optional) %>% # drop the dup'd coordinate columns
  mutate(newID = paste("ID", PlaceID, sep="_")) # make an ID field that isn't numeric

# Having an ID field that isn't numeric lets us use those IDs as row and column names, which we will need when manipulating the climate rasterbricks
crs_WGS84 <- CRS(SRS_string = "EPSG:4326")
cities_sp <- cities
coordinates(cities_sp) <- ~ XCOORD + YCOORD
projection(cities_sp) <- crs_WGS84

#### Extract values for cities sample ------------------------------

# Then extract temperature and precipitation values at each city by using **raster's** *extract* function to get the values from the climate model brick.

# Pull all the CCSM data into one data frame, with one row per year, and one column per variable/location combination. First *rbind* the two sets of CCSM data and *transpose* the results, turning the rows into columns. Add a column for the Year.

CCSM.prc.ann.mat <- data.frame(cities$newID,
                               raster::extract(CCSM.prc.ann, cities_sp, layer=1)) %>%
  # adds row name "cities.newID" - will introduce NA's in "Year" later, disregard
  t %>% # transpose: years in rows, cities in columns
  as.data.frame %>% # transpose made this a matrix - convert type back
  setNames(.,as.character(unlist(.[1,]))) %>% # set as col names from newID row
  rownames_to_column('Year') %>%
  mutate(Year = as.numeric(substring(Year, 5))) %>% # Year starts in 5th char of name
  slice(-1) %>% # drop row of newID
  lapply(function(x) if (is.factor(x)) as.numeric(as.character(x)) 
         else {x}) %>% # transposing made factors b/c string with num
  as.data.frame # convert type back from list made with lapply

#saveRDS(CCSM.prc.ann.mat, here::here("in_data", "climate_prc_annual.rds"))

CCSM.t.ann.mat <- data.frame(cities$newID,
                               raster::extract(CCSM.t.ann, cities_sp, layer=1)) %>%
  t %>% # transpose: years in rows, cities in columns
  as.data.frame %>% # transpose made this a matrix - convert type back
  setNames(.,as.character(unlist(.[1,]))) %>% # set as col names from newID row
  rownames_to_column('Year') %>%
  mutate(Year = as.numeric(substring(Year, 5))) %>% # Year starts in 5th char of name
  slice(-1) %>% # drop row of newID
  lapply(function(x) if (is.factor(x)) as.numeric(as.character(x)) 
         else {x}) %>% # transposing made factors b/c string with num
  as.data.frame # convert type back from list made with lapply

#saveRDS(CCSM.t.ann.mat, here::here("in_data", "climate_t_annual.rds"))

climate <- merge(CCSM.prc.ann.mat %>%
  gather(key = "newID", value = "prc", starts_with("ID")),
  CCSM.t.ann.mat %>%
    gather(key = "newID", value = "temp", starts_with("ID")),
  by.x=c("Year","newID"), by.y=c("Year","newID"), all.x=TRUE, all.y=TRUE)
  
write_csv(climate, here::here("in_data", "CCSM_climate_annual.csv"))
